#!/usr/bin/perl
#
# APPR - Automated Profiling and Performance Regression - is an
# extensible performance profiling framework for OpenOffice.org.
# 
# Copyright (C) 2005 Intel Corporation
# 
# GNU Lesser General Public License Version 2.1
# ======================================================================
# 
# This library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation; either version 2.1 of the
# License, or (at your option) any later version.
# 
# This library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA
# 
# ======================================================================

use strict;
use Getopt::Long;
use XML::Mini::Document;
use Data::Dumper;

sub mark_dlopen;
sub find_duplicate_lstat;
sub print_duplicate_lstat;
sub do_RTL_PERF;
sub do_RTL_modules;
sub do_RTL_START_END;
sub dump_RTL;
sub do_dlopen;
sub print_dlopen;
sub do_ld_output;
sub dump_ld_output;
sub find_strace_begin;
sub do_monitor_output;
sub do_vtune_output;

my $strace = '/dev/null';
my $RTL_LOG = '/dev/null'; 
my $ld_log = '/dev/null';
my $monitor_log = '/dev/null';
my $vtune_log = '/dev/null';
my $help_option;
my $output = 'report.xml';
my $strace_begin= 0;
my $RTL_END_TIME;

GetOptions ('strace=s' => \$strace,
	    # 'RTL=s' => \$RTL_LOG,
	    'ooo-rtl=s' => \$RTL_LOG,
	    # 'ld_log=s' => \$ld_log,
	    'ld-debug=s' => \$ld_log,
	    'outfile=s' => \$output,
	    'monitor=s' => \$monitor_log,
	    'vtune=s' => \$vtune_log,
	    'help' => \$help_option);
if ($help_option)
{
    print STDERR "Usage: report_generator arguments\n";
    print STDERR "  --strace=<strace_log_file>\n";
    print STDERR "  --RTL=<rtl_log_file>\n";
    print STDERR "  --monitor=<monitor_log_file>\n";
    print STDERR "  --vtune=<vtune_log_file>\n";
    print STDERR "  --ld_log=<ld_output_file>\n";
    print STDERR "  --outfile=<output_xml_file>\n";
    die "Bug author do do this properly with Pod::Usage\n";
}

open ST, "<$strace" or die "Couldn't open strace file: $strace\n";
open RTL, "<$RTL_LOG" or die "Couldn't open RTL_LOG file: $RTL_LOG\n";
open LD, "<$ld_log" or die "Couldn't open ld_log file: $ld_log\n";
open M, "<$monitor_log" or die "Couldn't open monitor log file: $monitor_log\n";
open VT, "<$vtune_log" or die "Couldn't open vtune log file: $vtune_log\n";
open O, ">$output" or die "Couldn't open outfile: $output\n";
my $newDoc= XML::Mini::Document->new();
my $newDocRoot= $newDoc->getRoot();
my $xmlHeader = $newDocRoot->header('xml');
$xmlHeader->attribute('version', '1.0');
my $report= $newDocRoot->createChild('APPR_report');

my $strace_line;
while (<ST>)
{
    $strace_line= $_;
    find_strace_begin;
    do_dlopen;
    find_duplicate_lstat;
}
my @strace_end_line_array=split /[\s]+/, $strace_line;
my $strace_end_time=$strace_end_line_array[1];
print_dlopen;
print_duplicate_lstat;
while (<RTL>)
{
    do_RTL_START_END;
    do_RTL_PERF;
    do_RTL_modules 'configmgr.*', 'config_mgr';
}
dump_RTL;
while (<LD>)
{
    do_ld_output;
}
dump_ld_output;
while (<M>)
{
    do_monitor_output;
}
do_vtune_output;

my $Runtime= $report->createChild('RunTime');
$Runtime->createChild('Start')->text($strace_begin);
#$Runtime->createChild('End')->text($RTL_END_TIME);
$Runtime->createChild('End')->text($strace_end_time);
print O $newDoc->toString();

#
# the functions below are a sort of kludge
# note the backwards sorting comparision
#
#
BEGIN
{
    my %statted_file;
    
# big kludge: reverse comparison!
    sub by_times
    {
	$statted_file{$b} <=> $statted_file{$a} 
    }

sub find_duplicate_lstat
{
    my $file;
    if (/stat64\(\"(.*)\"/)
    {
	if ($statted_file{$1})
	{
	    $statted_file{$1}= $statted_file{$1} + 1;
	}
	else
	{ $statted_file{$1}= "1"; }
    }	
}

sub print_duplicate_lstat
{    
    my $file;
    my $stat= $report->createChild('stat');
    $stat->comment("Top 10 files by number of times statted");
    my $counter= 0; #kludge.  There must be a better way
    for $file (sort by_times keys %statted_file) 
    {   
	if ($counter < 10)
	{
	    $stat->createChild('path')->text($file);
	    $stat->createChild('times')->text($statted_file{$file});	    
	    $counter++;
	}
	else
	{
	    return;
	}
    }
}
}

BEGIN
{
    my @module_times;
    my @perf_markers;
    my $START_END_FOUND= 0;
    my $RTL_START_TIME;
    sub do_RTL_START_END
    {
	unless ($START_END_FOUND)
	{
	    if (/opening log file/)
	    {
		my @st= split / /;
		$RTL_START_TIME= $st[3];
	    }
	    if (/closing log file at ([\d]+)/)
	    {
		$RTL_END_TIME= $RTL_START_TIME + $1 / 1000;
		$START_END_FOUND= 1;
	    }
	}
    }
sub do_RTL_PERF{
	# match for a timestamped line (for reference later)
	#if (/([\d]+) [\d]+ ([|{}]) (.*)/)

	if (/([\d]+) [\d]+ ([|{}]).*PERFORMANCE - (.*)/)
	{
	    my $rec;
	    $rec->{"time"}= $1 / 1000 + $RTL_START_TIME;
	    $rec->{"type"}= $2;
	    $rec->{"string"}= $3;	    
	    push @perf_markers, $rec;
	}
    }

# the issue:
#  it assumes that you are not interested in one module that calls another
#  ie, you cannot do search for config and vcl, b/c vcl calls config
sub do_RTL_modules
{
    my $module_regexp= shift;
    my $module_name= shift;
    # module starts
    if (/([\d]+) [\d]+ \{ ($module_regexp)/)
    {
#	print "start\n";
	my $rec;
	if (@module_times)
	{
	    $rec= $module_times[$#module_times];
	    if ($rec->{"end_time"} != -1)
	    {
		my $rec2;
		$rec2->{"module"}= $module_name;
		$rec2->{"start_time"}= $1 / 1000 + $RTL_START_TIME;
		$rec2->{"string"}= $2;
		$rec2->{"end_time"}= -1; #special flag
		push @module_times, $rec2;
	    }
	}
	else
	{
	    $rec->{"module"}= $module_name;
	    $rec->{"start_time"}= $1 / 1000 + $RTL_START_TIME;
	    $rec->{"string"}= $2;
	    $rec->{"end_time"}= -1; #special flag
	    push @module_times, $rec;
	}
    } 

    #module ends
    if (/([\d]+) [\d]+ \} ($module_regexp)/)
    {
#	print "end\n";
	my $rec;
	if (@module_times)
	{
	    $rec= $module_times[$#module_times];
	    if (($rec->{"end_time"} == -1) &&
		($rec->{"string"} eq $2))
	    {
		$rec->{"end_time"}= $1 / 1000 + $RTL_START_TIME;
	    }
	}
	else
	{
	    print STDERR "This should never happen\n";
	}
    }
}
sub dump_RTL
{
    my $i;
    my $total= 0;
    for $i (0 .. $#module_times)
    {
	my $module= $report->createChild('Module');
	$module->createChild('mod_name')->text($module_times[$i]{module});
	$module->createChild('start_time')->text($module_times[$i]{start_time});
	$module->createChild('end_time')->text($module_times[$i]{end_time});
	$module->createChild('string')->text($module_times[$i]{string});
    }
    for $i (0 .. $#perf_markers)
    {
	my $marker= $report->createChild('PERF_marker');
	$marker->createChild('time')->text($perf_markers[$i]{"time"});	
	$marker->createChild('type')->text($perf_markers[$i]{"type"});	
	$marker->createChild('string')->text($perf_markers[$i]{"string"});
    }
    
}

}

BEGIN
{
my %dlopen;
my $fc=0;

# this function generates warnings  FIXME
# check for activities in a sequence corresponding to dlopening libs
sub do_dlopen {  
	# stores data in %dlopen 
	# TOTALTIME =>
	# $fd => hash{DLOK, OPENTIME, FNAME, OPENCTIME, TOTALTIME}
	if (/\d*\s(\d.*) close\((\d+)\).*<(.*)>/) {
		my ($t, $fd, $ct) = ($1, $2, $3);
		if ($dlopen{$fd}) {
			if ($dlopen{$fd}{'DLSEQ'} =~ /open|read|fstat|(mmap|).*/) {
				$dlopen{$fd}{'DLOK'} = 1;
			}
			if ($dlopen{$fd}{'DLOK'}) {
				# we are processing this file as dlopen
				# gather final statistics
				my $thistime = $t - $dlopen{$fd}{'OPENTIME'} + $ct;
				$dlopen{$fd}{'TOTALTIME'} = $thistime;
				$dlopen{'TOTALTIME'} += $thistime;
			}
			# hash to permanent id 
			my $did = 'DL-' . $fc++;
			$dlopen{$did} = $dlopen{$fd};
			$dlopen{$fd} = 0;
			return;
		}
	}
	if (/\d.*\s(\d.*) open\(\"(.*lib.*\.so.*)\".*=\s(\d+)\s<(.*)>/) {
		# begin new sequence 
		my ($t, $fname, $fd, $ct) = ($1, $2, $3, $4);
		$dlopen{$fd} = {'OPENTIME' => $t,
						'FNAME' => $fname,
						'OPENCTIME' => $ct,
						'DLOK' => 0, 
						'DLSEQ' => 'open|', }
	}
	if (/read\((\d+),/ and $dlopen{$1}) {
		$dlopen{$1}{'DLSEQ'} .= 'read|';
	}
	if (/fstat64\((\d+),/ and $dlopen{$1}) {
		$dlopen{$1}{'DLSEQ'} .= 'fstat|';
	}
	if (/old_mmap/) {
		my @ff = split /,/;
		my $fd = int($ff[4]);
		if ($dlopen{$fd}) {
			$dlopen{$fd}{'DLSEQ'} .= 'mmap|';
		}
	}
	if (/madvise/)		{ } # can't correlate with fd
}

sub print_dlopen {
	foreach my $k (keys %dlopen) {
		next if not ($k =~ /DL-/); 
		my $lib_open= $report->createChild('library_load');
		$lib_open->createChild('path')->text($dlopen{$k}{'FNAME'});
		$lib_open->createChild('time')->text($dlopen{$k}{'OPENTIME'});
	}
}
}

BEGIN
{
    my $tot_start_time;
    my $time_reloc;
    my $num_reloc;
    my $num_reloc_cache;
    my $num_rel_reloc;
    my $time_load_obj;
    my $fin_num_reloc;
    my $fin_num_reloc_cache;
    sub do_ld_output
    {
	if (/total startup time in dynamic loader: (\d*)/)
	{
	   $tot_start_time= $1; 
	}
	if (/time needed for relocation: (\d*)/)
	{
	   $time_reloc= $1; 
	}
	if (/number of relocations: (\d*)/)
	{
	    $num_reloc= $1;  
	}
	if (/number of relocations from cache: (\d*)/)
	{
	    $num_reloc_cache= $1;
	}
	if (/number of relative relocations: (\d*)/)
	{
	    $num_rel_reloc= $1; 
	}
	if (/time needed to load objects: (\d*)/)
	{
	    $time_load_obj= $1;
	}
	if (/final number of relocations: (\d*)/)
	{
	    $fin_num_reloc= $1;
	}
	if (/final number of relocations from cache: (\d*)/)
	{
	   $fin_num_reloc_cache= $1; 
	}
    }

sub dump_ld_output
{
    my $MHZ;
    open CPUINFO, '</proc/cpuinfo' or die "Couldn't open /proc/cpuinfo\n";
    while (<CPUINFO>)
    {
	if (/cpu MHz\s*:\s*([\d\.]*)/)
	{
	    $MHZ= $1;
	}
    }
    my $ld_stats= $report->createChild('ld_debug');
    $ld_stats->comment("Statistics_from_ld.so");
    $ld_stats->createChild("total_startup_time_in_dynamic_loader")->text($tot_start_time / $MHZ / 1e6);
    $ld_stats->createChild("time_needed_for_relocation")->text($time_reloc / $MHZ / 1e6);    
    $ld_stats->createChild("number_of_relocations")->text($num_reloc);
    $ld_stats->createChild("number_of_relocations_from_cache")->text($num_reloc_cache);
    $ld_stats->createChild("number_of_relative_relocations")->text($num_rel_reloc);
    $ld_stats->createChild("time_needed_to_load_objects")->text($time_load_obj / $MHZ / 1e6);
    $ld_stats->createChild("final_number_of_relocations")->text($fin_num_reloc);
    $ld_stats->createChild("final_number_of_relocations_from_cache")->text($fin_num_reloc_cache);
}
}

sub find_strace_begin
{
    if (/\d+\s+([\d\.]+)\s/ && !$strace_begin)
    {
	$strace_begin= $1;
    }
}

BEGIN 
{
my %vtir;
my %vtct;
my %vtall;
sub do_vtune_output {
	my $current; 
	my $indx;
	my $hashref;
	return if ($vtune_log eq '/dev/null');
	while (<VT>) {
		if (/--Event Instructions Retired--/) {
			$hashref = \%vtir;
		}
		if (/--Event Clockticks--/) {
			$hashref = \%vtct;
		}
		if (/\[soffice.bin - modules\]/) {
			$current = 'MODULES';
			$indx = 0;
		}
		if (/\[(.*)\s- functions\]/) {
			$current = $1;
			$indx = 0;
		}
		if (/([0-9.].*)%\s[0-9]+\s(.*)/) {
			$$hashref{$current}[$indx++] = {'PCT' => $1, 'NAME' => $2};
		}
	}
	for (my $i = 0; $i < 2; $i++) {
		my $met;
		if ($i == 0) { 
			$hashref = \%vtir; 
			$met = 'IR';
		} elsif ($i == 1) {
			$hashref = \%vtct;
			$met = 'CT';
		}
		foreach my $m (@{$$hashref{'MODULES'}}) {
			my $mod = $m->{'NAME'};
			foreach my $f (@{$$hashref{"$mod"}}) {
				my $func = $f->{'NAME'};
				my $mf = $mod . '::' . $func;
				$vtall{$mf}{$met} = $m->{'PCT'} * $f->{'PCT'} / 100;
			}
		}
	}
	my $vtrep = $report->createChild('VTune');
	foreach my $k (keys %vtall) {
		my $vtdata = $vtrep->createChild('VTD');
		$vtdata->createChild('modfun')->text($k);
		my $ct = $vtall{$k}{'CT'};
		my $ir = $vtall{$k}{'IR'};
		$ct = 0 if (not $ct);
		$ir = 0 if (not $ir);
		$vtdata->createChild('CT')->text($ct);
		$vtdata->createChild('IR')->text($ir);
	}
}
}

sub do_monitor_output
{
    if (/<.*>/)
    {
	my $monitor= $report->createChild('Monitor');
	my @m_line= split / /;
	
	my $i;
	for ($i= 1; $i < $#m_line; $i++)
	{
	    my @line= split /=/,$m_line[$i];
	    for ($line[1])
	    {
		s/[\"]+//g;
	    }
	    $monitor->createChild($line[0])->text($line[1]);
	}
    }
}
